* Firm organization with multiple establishments
* Appendix A: Data preparation Tables A.21-A.26

clear all 
set matsize 2000
set more off

capture log close
log using log/33_cmrh-p_replication_est_data.log, replace

********************************************************************************
***	Prepare data to compare establishment types ********************************

use data/Panel.dta if jahr >= 2000 & jahr <= 2010, clear
desc
//	Drop employee characteristics that are not needed in analyses
drop grund stib frau foreign ein_erw nbr_occ tage_wz tage_occ ///
	l_beruf f_beruf l_tentgelt f_tentgelt d_age 
//	Drop firm characteristics that are not needed in analyses
drop mean_wage verysmall
drop beruf2010 grd_dat lzt_dat ao_bula
order limit cens lnw, after(tentgelt)
	
********************************************************************************
***	Sample restriction *********************************************************
********************************************************************************

*	Minimum firm size
bys untid jahr: egen empl_unt = count(persnr)
by  untid: egen min_empl = min(empl_unt)
qui keep if min_empl >= 10
drop min_empl

*	Cleaning: social security limit
qui replace tentgelt = limit if cens == 1
drop limit cens
qui replace lnw = ln(tentgelt) 		
count if lnw == .

*	Multi-establishment firms
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)
gen aux_mbu = (count_est > 1)
bys untid jahr: egen d_mbu = max(aux_mbu)
cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)

keep if count_est > 1
drop aux_mbu flg_untjhr

********************************************************************************
***	Establishment characteristics **********************************************
********************************************************************************

bys betnr jahr: egen empl_est = count(persnr)

merge m:1 betnr jahr using data/BHPinclUntID.dta
qui keep if _merge == 3
drop _merge

drop count_est_siab chge_est entry_unt jahr_eins_final jahr_eins_est az_vz eintritt ///
	betnr_vor untid_vor verysmall matchtype d_multest_4 ever_multest_4 always_multest_4 vorg_died ///
	mode austritt betnr_nach nachf_willbenew untid_nach vorgaenger nachfolger ///
	erst_jahr_eintritt jahr_lzt_est first_year_ME entry_dyn exit_dyn exit_unt add_est mode_unt sector sector2 hq_sector

order hq_kreis, after(ao_bula)

********************************************************************************
***	Layer classification based on KldB1988, managerial organization ************
********************************************************************************

tab layer, missing

bys untid jahr: egen lowest = min(layer)
bys untid jahr: egen second_lowest = min(layer) if layer > lowest
bys untid jahr: egen third_lowest = min(layer) if layer > second_lowest
bys untid jahr: egen highest = min(layer) if layer > third_lowest

gen layer_rank = .
replace layer_rank = 0 if layer == lowest
replace layer_rank = 1 if layer == second_lowest & layer_rank == .
replace layer_rank = 2 if layer == third_lowest & layer_rank == .
replace layer_rank = 3 if layer == highest & layer_rank == .
drop lowest *_lowest highest

tab2 layer_rank layer, missing

// Define ranks at establishment-level to check whether they are the similar

bys betnr jahr: egen lowest_est = min(layer)
bys betnr jahr: egen second_lowest_est = min(layer) if layer > lowest_est
bys betnr jahr: egen third_lowest_est = min(layer) if layer > second_lowest_est
bys betnr jahr: egen highest_est = min(layer) if layer > third_lowest_est

gen layer_rank_est = .
replace layer_rank_est = 0 if layer == lowest_est
replace layer_rank_est = 1 if layer == second_lowest_est & layer_rank_est == .
replace layer_rank_est = 2 if layer == third_lowest_est & layer_rank_est == .
replace layer_rank_est = 3 if layer == highest_est & layer_rank_est == .
drop lowest* *_lowest* highest*

tab2 layer_rank layer, missing
tab2 layer_rank layer_rank_est, missing

// Create dummy if layer_rank defined at establishment- and firm-level are the same

gen aux_same_rank = (layer_rank == layer_rank_est)
bys betnr jahr: egen same_rank_unt_est = min(aux_same_rank)
drop layer_rank_est


// Number of layers
egen flg_estjhrlay = tag(betnr jahr layer)
bys betnr jahr: egen count_lay_est = total(flg_estjhrlay)
drop flg_estjhrlay

// Combination of layers
sort betnr jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by betnr jahr: egen count_layer`l' = sum(e_`l')
		by betnr jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy establishment has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

capture drop comb_layer_est
gen comb_layer_est = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer_est "Pattern of layers, from high to low, establishment level"
label define structure 1 "0" 10 "1" 100 "2" 1000 "3" 11 "0 + 1" 101 "0 + 2" 1001 "0 + 3" ///
	110 "1 + 2" 1010 "1 + 3" 1100 "2 + 3" 111 "0 + 1 + 2" 1101 "0 + 2 + 3" 1011 "0 + 1 + 3" ///
	1110 "1 + 2 + 3" 1111 "All layers"
label values comb_layer_est structure
drop l_*

//	Number of managerial layers
egen flg_estjhrlay = tag(betnr jahr layer_rank)
qui replace flg_estjhrlay = 0 if layer_rank == 0
bys betnr jahr: egen count_mgmt_est = total(flg_estjhrlay)
drop flg_estjhrlay

********************************************************************************
***	additional characteristics for robustness checks ***************************
********************************************************************************

preserve

//	Number of managerial layers
egen flg_untjhrlay = tag(untid jahr layer_rank)
qui replace flg_untjhrlay = 0 if layer_rank == 0
bys untid jahr: egen count_mgmt_unt = total(flg_untjhrlay)


cap drop flg_estjhr
egen flg_estjhr = tag(betnr jahr)
keep if flg_estjhr == 1

forvalues q = 10(10)90 {
	qui egen empl_dec`q' = pctile(empl_est), p(`q')
	}
qui gen dec_empl = 100
forval i = 90(-10)10 {
	qui replace dec_empl = `i' if empl_est <= empl_dec`i' & empl_dec`i' != .
	}

*	Multi-establishment firms
cap drop count_est
bys untid jahr: gen count_est = _N

keep betnr jahr count_mgmt_unt same_rank_unt_est dec_empl empl_est count_est hauptbet comb_layer_est w08_5
duplicates drop
save data/est_count_mgmt_unt_panel.dta, replace

restore

drop same_rank_unt_est


********************************************************************************
***	establishment characteristics **********************************************
********************************************************************************

// Other sums
bys betnr jahr: egen tot_wages = total(tentgelt)
bys betnr jahr: egen tot_educ = total(d_educ)
bys betnr jahr: egen tot_tenure = total(tage_bet)

// Reduce to establishment-level variables
preserve
keep betnr jahr empl_est tot_* count_lay_est comb_layer_est count_mgmt_est count_est hauptbet

duplicates drop
duplicates report betnr jahr
tab jahr, missing
label variable empl_est "# of employees in establishment"
label variable tot_wages "Wage sum in establishment"
label variable tot_educ "Education sum in establishment"
label variable count_lay_est "# of layers in establishment"
label variable count_mgmt_est "# of mgmt layers in establishment"
describe

save data/estlevel_CMRH.dta, replace
restore

********************************************************************************
***	Hierarchy characteristics **************************************************
********************************************************************************

bys betnr jahr layer_rank: egen empl_lyr = count(persnr)
bys betnr jahr layer_rank: egen wage_lyr = total(tentgelt)
bys betnr jahr layer_rank: egen educ_lyr = total(d_educ)
bys betnr jahr layer_rank: egen tenure_lyr = total(tage_bet)

egen flg_estjhrlay = tag(betnr jahr layer_rank)
keep if flg_estjhrlay == 1
drop flg_estjhrlay

keep wage_lyr empl_lyr educ_lyr tenure_lyr jahr betnr layer_rank
reshape wide wage_lyr empl_lyr educ_lyr tenure_lyr, i(jahr betnr) j(layer_rank)

********************************************************************************
***	Combine data ***************************************************************
********************************************************************************

merge 1:1 betnr jahr using data/estlevel_CMRH.dta
drop if _merge == 2
drop _merge

capture drop flg_est
capture drop flg_estjhr
capture drop flg_estjhr

egen flg_est = tag(betnr)
egen flg_estjhr = tag(betnr jahr)

xtset, clear
sort betnr jahr
xtset betnr jahr

cap drop flg_estjhr
egen flg_estjhr = tag(betnr jahr)

// add number of management layers at firm level
merge 1:1 betnr jahr using data/est_count_mgmt_unt_panel.dta
drop if _merge == 2
drop _merge


************************************
** Other supporting variables  *****
************************************
gen avg_wage=tot_wages/empl_est

*marking establishments with adjacent (consecutively ordered) layers
gen correctlyr= (comb_layer_est == 1 | comb_layer_est ==11  | comb_layer_est ==111 | comb_layer_est ==1111)
*keep if correctlyr==1 	// remove the asterisk and let this instruction be executed to 
						// produce tables with consecutively ordered layers.
						
*average wage and skill by layer
foreach v in wage educ tenure {
	forvalues l=0/3 {
		gen avg_`v'_lyr`l'=`v'_lyr`l'/empl_lyr`l'
		
	}
}
*

*log employees
gen ln_empl_est=log(empl_est)

*log average wage
gen lavgw=log(avg_wage)

sort betnr jahr
*number of layers next period
gen lnext = F.count_mgmt_est

*number of layers in two periods
gen lnext2=F2.count_mgmt_est

*setting them to -1 if the establishment exits
replace lnext=-1 if lnext==.
replace lnext2=-1 if lnext2==.

*next organizational type, and its missing values
gen comb_layer_est_next=F.comb_layer_est
replace comb_layer_est_next=-1 if comb_layer_est_next==.

sort betnr jahr

*average wage, skills in pre-existing layers and their log change
*****************************************************************
gen avg_wage_notop_next=.
gen avg_wage_notop_bef=.
gen dlog_notop=.
**if establishments are adding layers
replace avg_wage_notop_next=(F.wage_lyr0+F.wage_lyr1+F.wage_lyr2)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_est==2 & lnext>2
replace avg_wage_notop_next=(F.wage_lyr0+F.wage_lyr1)/(F.empl_lyr0+F.empl_lyr1) if count_mgmt_est==1 & lnext>1
replace avg_wage_notop_next=(F.wage_lyr0)/(F.empl_lyr0) if count_mgmt_est==0 & lnext>0
replace dlog_notop=log(avg_wage_notop_next)-log(avg_wage) if count_mgmt_est<lnext
count if dlog_notop~=. & lnext==-1
*if establishments are dropping layers
replace avg_wage_notop_bef=(wage_lyr0+wage_lyr1+wage_lyr2)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_est>2 & lnext==2
replace avg_wage_notop_bef=(wage_lyr0+wage_lyr1)/(empl_lyr0+empl_lyr1) if count_mgmt_est>1 & lnext==1
replace avg_wage_notop_bef=(wage_lyr0)/(empl_lyr0) if count_mgmt_est>0 & lnext==0
replace dlog_notop=log(F.avg_wage)-log(avg_wage_notop_bef) if count_mgmt_est>lnext
count if dlog_notop~=. & lnext==-1
*if the establishment doesn't change layers
replace dlog_notop=log(F.avg_wage)-log(avg_wage) if lnext==count_mgmt_est

*counting whether we are covering all observed transitions	
count if dlog_notop==. & lnext~=-1

*changes in skills in pre-existing layers
foreach variable in educ tenure {
		gen avg_`variable'=tot_`variable'/empl_est
		gen avg_`variable'_notop_next=.
		gen avg_`variable'_notop_bef=.
		gen dlog_`variable'_notop=.
		**if establishments are adding layers
		replace avg_`variable'_notop_next=(F.`variable'_lyr0+F.`variable'_lyr1+F.`variable'_lyr2)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_est==2 & lnext>2
		replace avg_`variable'_notop_next=(F.`variable'_lyr0+F.`variable'_lyr1)/(F.empl_lyr0+F.empl_lyr1) if count_mgmt_est==1 & lnext>1
		replace avg_`variable'_notop_next=(F.`variable'_lyr0)/(F.empl_lyr0) if count_mgmt_est==0 & lnext>0
		replace dlog_`variable'_notop=log(avg_`variable'_notop_next)-log(avg_`variable') if count_mgmt_est<lnext
		count if dlog_`variable'_notop~=. & lnext==-1
		*if establishments are dropping layers
		replace avg_`variable'_notop_bef=(`variable'_lyr0+`variable'_lyr1+`variable'_lyr2)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_est>2 & lnext==2
		replace avg_`variable'_notop_bef=(`variable'_lyr0+`variable'_lyr1)/(empl_lyr0+empl_lyr1) if count_mgmt_est>1 & lnext==1
		replace avg_`variable'_notop_bef=(`variable'_lyr0)/(empl_lyr0) if count_mgmt_est>0 & lnext==0
		replace dlog_notop=log(F.avg_`variable')-log(avg_`variable'_notop_bef) if count_mgmt_est>lnext
		count if dlog_`variable'_notop~=. & lnext==-1
		*if the establishment doesn't change layers
		replace dlog_`variable'_notop=log(F.avg_`variable')-log(avg_`variable') if lnext==count_mgmt_est

		*counting whether we are covering all observed transitions	
		count if dlog_`variable'_notop==. & lnext~=-1
	}
*

* generating normalized empl
gen norm_empl_lyr0=.
gen norm_empl_lyr1=.
gen norm_empl_lyr2=.
gen norm_empl_lyr3=.
forvalues l=0/3 {
		forvalues c=0/`l' {
			gen temp = empl_lyr`c'/empl_lyr`l' if count_mgmt_est==`l'
			replace norm_empl_lyr`c' = temp if count_mgmt_est==`l'
			drop temp
		}		
}
*
egen norm_empl=rowtotal(norm_empl*)

*log changes of layer-level variables
sort betnr jahr
foreach v in avg_wage_lyr norm_empl_lyr empl_lyr avg_educ_lyr avg_tenure_lyr {

	forvalues l=0/3 {
		gen log_`v'`l' = log(`v'`l')
		
		gen `v'`l'_next=F.`v'`l'
		gen dlog_`v'`l'=F.log_`v'`l'-log_`v'`l'
		
		*two periods ahead vs. before the change
		gen dlog02_`v'`l'=F2.log_`v'`l'-log_`v'`l'
		*two periods ahead vs. one period ahead
		gen dlog12_`v'`l'=F2.log_`v'`l'-F.log_`v'`l'
		
	}

}


*
*direction of change
foreach v in empl_est norm_empl avg_wage {
	gen dir_`v' =.
	replace dir_`v'=1  if F.`v'>`v' & F.`v'~=.
	replace dir_`v'=0  if F.`v'==`v' & F.`v'~=.
	replace dir_`v'=-1 if F.`v'<`v' & `v'~=.
	gen dlog_`v'=log(F.`v')-log(`v')
	count if lnext~=-1 & dir_`v'==.

}
*

*change in wage at the top
gen dlog_avg_wage_top=.
forvalues b=0/3 {
	forvalues a=0/3 {

		replace dlog_avg_wage_top=log(avg_wage_lyr`a'_next)-log(avg_wage_lyr`b') if count_mgmt_est==`b' & lnext==`a'
		
	}
}
*change in employment at the top
gen dlog_empl_top=.
forvalues b=0/3 {
	forvalues a=0/3 {

		replace dlog_empl_top=log(empl_lyr`a'_next)-log(empl_lyr`b') if count_mgmt_est==`b' & lnext==`a'
		
	}
}
*	

*detrended wage: removing mean wage change by jahr.
gen dlog_wage_detrended=.
forvalues y=1998/2008 {
	sum dlog_avg_wage if jahr==`y'
	replace dlog_wage_detrended=dlog_avg_wage-r(mean) if jahr==`y'
}
*


*variables for decomposition of changes in average wage
********************************************************

*share of hours in common layers, establishments that add layers
gen sharehours_notop_next=.

replace sharehours_notop_next=(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_est==2 & lnext==3

replace sharehours_notop_next=(F.empl_lyr0+F.empl_lyr1)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_est==1 & lnext==2
replace sharehours_notop_next=(F.empl_lyr0+F.empl_lyr1)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_est==1 & lnext==3

replace sharehours_notop_next=(F.empl_lyr0)/(F.empl_lyr0+F.empl_lyr1) if count_mgmt_est==0 & lnext==1
replace sharehours_notop_next=(F.empl_lyr0)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_est==0 & lnext==2
replace sharehours_notop_next=(F.empl_lyr0)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_est==0 & lnext==3


*share of hours in common layers, establishments that drop layers
gen sharehours_notop_bef=.

replace sharehours_notop_bef=(empl_lyr0+empl_lyr1+empl_lyr2)/(empl_lyr0+empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_est==3 & lnext==2

replace sharehours_notop_bef=(empl_lyr0+empl_lyr1)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_est==2 & lnext==1
replace sharehours_notop_bef=(empl_lyr0+empl_lyr1)/(empl_lyr0+empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_est==3 & lnext==1

replace sharehours_notop_bef=(empl_lyr0)/(empl_lyr0+empl_lyr1) if count_mgmt_est==1 & lnext==0
replace sharehours_notop_bef=(empl_lyr0)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_est==2 & lnext==0
replace sharehours_notop_bef=(empl_lyr0)/(empl_lyr0+empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_est==3 & lnext==0


*avg_wage in added layers
gen avg_wage_top_next=.

replace avg_wage_top_next=(F.wage_lyr3)/(F.empl_lyr3) if count_mgmt_est==2 & lnext==3

replace avg_wage_top_next=(F.wage_lyr2)/(F.empl_lyr2) if count_mgmt_est==1 & lnext==2
replace avg_wage_top_next=(F.wage_lyr2+F.wage_lyr3)/(F.empl_lyr2+F.empl_lyr3) if count_mgmt_est==1 & lnext==3

replace avg_wage_top_next=(F.wage_lyr1)/(F.empl_lyr1) if count_mgmt_est==0 & lnext==1
replace avg_wage_top_next=(F.wage_lyr1+F.wage_lyr2)/(F.empl_lyr1+F.empl_lyr2) if count_mgmt_est==0 & lnext==2
replace avg_wage_top_next=(F.wage_lyr1+F.wage_lyr2+F.wage_lyr3)/(F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_est==0 & lnext==3


*avg_wage in dropped layers
gen avg_wage_top_bef=.

replace avg_wage_top_bef=(wage_lyr3)/(empl_lyr3) if count_mgmt_est==3 & lnext==2

replace avg_wage_top_bef=(wage_lyr2)/(empl_lyr2) if count_mgmt_est==2 & lnext==1
replace avg_wage_top_bef=(wage_lyr2+wage_lyr3)/(empl_lyr2+empl_lyr3) if count_mgmt_est==3 & lnext==1

replace avg_wage_top_bef=(wage_lyr1)/(empl_lyr1) if count_mgmt_est==1 & lnext==0
replace avg_wage_top_bef=(wage_lyr1+wage_lyr2)/(empl_lyr1+empl_lyr2) if count_mgmt_est==2 & lnext==0
replace avg_wage_top_bef=(wage_lyr1+wage_lyr2+wage_lyr3)/(empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_est==3 & lnext==0

*checking whether the avg wage implied by these new variables coincides 
*with the correct avg wage, for establishments adding layers
gen avg_wage_next=avg_wage_notop_next*sharehours_notop_next+avg_wage_top_next*(1-sharehours_notop_next)
gen diff=avg_wage_next-F.avg_wage
sum diff
replace diff=100*diff/F.avg_wage
sum diff
count if avg_wage_next~=F.avg_wage & avg_wage_next~=.
count if avg_wage_next~=.
drop diff


*checking whether the avg wage implied by these new variables coincides with 
*the correct avg hourly wage, for establishments dropping layers
gen avg_wage_bef=avg_wage_notop_bef*sharehours_notop_bef+avg_wage_top_bef*(1-sharehours_notop_bef)
gen diff=avg_wage_bef-avg_wage
sum diff
replace diff=100*diff/avg_wage
sum diff
count if avg_wage_bef~=avg_wage & avg_wage_bef~=.
drop diff


*wage decomposition
gen common=avg_wage_notop_next/avg_wage if count_mgmt_est<lnext
gen added=avg_wage_top_next/avg_wage if count_mgmt_est<lnext
drop avg_wage_next

gen common_bef=avg_wage_notop_bef/F.avg_wage if count_mgmt_est>lnext
gen common_bef1=1/common_bef //to make it comparable to tables 
gen dropped=avg_wage_top_bef/F.avg_wage if count_mgmt_est>lnext
drop avg_wage_bef

*generating detrended variables
foreach s in avg_wage_lyr norm_empl_lyr empl_lyr avg_educ_lyr avg_tenure_lyr {
		forvalues l=0/3 {
			gen det_dlog_`s'`l'=.
			forvalues y=1998/2010 {
				sum dlog_`s'`l' if jahr==`y'
				replace det_dlog_`s'`l'=dlog_`s'`l'-r(mean) if jahr==`y'
			}
			
		}
	
	}
	
gen det_dlog_avg_wage_top = .
forvalues l = 0/3 {
replace det_dlog_avg_wage_top = det_dlog_avg_wage_lyr`l' if count_mgmt_est == `l'
}

compress 
save "data/CMRH_2000-2010_p_est_layer-firm.dta", replace

********************************************************************************
********************************************************************************

log close
